nixpkgs/pkgs/applications/graphics/tesseract/4.x.nix
Matthew Justin Bauer 2eacddf0dc treewide: homepage URL fixes (#28475)
* pgadmin: use https homepage

* msn-pecan: move homepage to github

google code is now unavailable

* pidgin-latex: use https for homepage

* pidgin-opensteamworks: use github for homepage

google code is unavailable

* putty: use https for homepage

* ponylang: use https for homepage

* picolisp: use https for homepage

* phonon: use https for homepage

* pugixml: use https for homepage

* pioneer: use https for homepage

* packer: use https for homepage

* pokerth: usee https for homepage

* procps-ng: use https for homepage

* pycaml: use https for homepage

* proot: move homepage to .github.io

* pius: use https for homepage

* pdfread: use https for homepage

* postgresql: use https for homepage

* ponysay: move homepage to new site

* prometheus: use https for homepage

* powerdns: use https for homepage

* pm-utils: use https for homepage

* patchelf: move homepage to https

* tesseract: move homepage to github

* quodlibet: move homepage from google code

* jbrout: move homepage from google code

* eiskaltdcpp: move homepage to github

* nodejs: use https to homepage

* nix: use https for homepage

* pdf2djvu: move homepage from google code

* game-music-emu: move homepage from google code

* vacuum: move homepae from google code
2017-08-22 20:50:04 +02:00

62 lines
2.1 KiB
Nix

{ stdenv, fetchFromGitHub, autoreconfHook, autoconf-archive, pkgconfig
, leptonica, libpng, libtiff, icu, pango, opencl-headers
# Supported list of languages or `null' for all available languages
, enableLanguages ? null
}:
stdenv.mkDerivation rec {
name = "tesseract-${version}";
version = "4.00.00alpha-git-20170410";
src = fetchFromGitHub {
owner = "tesseract-ocr";
repo = "tesseract";
rev = "36a995bdc92eb2dd8bc5a63205708944a3f990a1";
sha256 = "0xz3krvap8sdm27v1dyb34lcdmx11wzvxyszpppfsfmjgkvg19bq";
};
tessdata = fetchFromGitHub {
owner = "tesseract-ocr";
repo = "tessdata";
rev = "8bf2e7ad08db9ca174ae2b0b3a7498c9f1f71d40";
sha256 = "0idwkv4qsmmqhrxcgyhy32yldl3vk054m7dkv4fjswfnalgsx794";
};
nativeBuildInputs = [ pkgconfig autoreconfHook autoconf-archive ];
buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ];
# Copy the .traineddata files of the languages specified in enableLanguages
# into `$out/share/tessdata' and check afterwards if copying was successful.
postInstall = let
mkArg = lang: "-iname ${stdenv.lib.escapeShellArg "${lang}.traineddata"}";
mkFindArgs = stdenv.lib.concatMapStringsSep " -o " mkArg;
findLangArgs = if enableLanguages != null
then "\\( ${mkFindArgs enableLanguages} \\)"
else "-iname '*.traineddata'";
in ''
numLangs="$(find "$tessdata" -mindepth 1 -maxdepth 1 -type f \
${findLangArgs} -exec cp -t "$out/share/tessdata" {} + -print | wc -l)"
${if enableLanguages != null then ''
expected=${toString (builtins.length enableLanguages)}
'' else ''
expected="$(ls -1 "$tessdata/"*.traineddata | wc -l)"
''}
if [ "$numLangs" -ne "$expected" ]; then
echo "Expected $expected languages, but $numLangs" \
"were copied to \`$out/share/tessdata'" >&2
exit 1
fi
'';
meta = {
description = "OCR engine";
homepage = https://github.com/tesseract-ocr/tesseract;
license = stdenv.lib.licenses.asl20;
maintainers = with stdenv.lib.maintainers; [viric];
platforms = with stdenv.lib.platforms; linux;
};
}