2017-04-08 01:43:18 +01:00
|
|
|
{ stdenv, fetchFromGitHub, autoreconfHook, pkgconfig
|
|
|
|
, leptonica, libpng, libtiff, icu, pango, opencl-headers
|
2017-04-07 22:05:04 +01:00
|
|
|
# Supported list of languages or `null' for all available languages
|
|
|
|
, enableLanguages ? null
|
2018-06-18 23:03:48 +01:00
|
|
|
# if you want just a specific list of languages, optionally specify a hash
|
|
|
|
# to make tessdata a fixed output derivation.
|
|
|
|
, enableLanguagesHash ? (if enableLanguages == null # all languages
|
|
|
|
then "1h48xfzabhn0ldbx5ib67cp9607pr0zpblsy8z6fs4knn0zznfnw"
|
|
|
|
else null)
|
2015-05-22 06:45:59 +01:00
|
|
|
}:
|
|
|
|
|
2018-06-18 23:03:48 +01:00
|
|
|
let tessdata = stdenv.mkDerivation ({
|
|
|
|
name = "tessdata";
|
|
|
|
src = fetchFromGitHub {
|
|
|
|
owner = "tesseract-ocr";
|
|
|
|
repo = "tessdata";
|
|
|
|
rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d";
|
|
|
|
# when updating don't forget to update the default value fo enableLanguagesHash
|
|
|
|
sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7";
|
|
|
|
};
|
|
|
|
buildCommand = ''
|
|
|
|
cd $src;
|
|
|
|
for lang in ${if enableLanguages==null then "*.traineddata" else stdenv.lib.concatMapStringsSep " " (x: x+".traineddata") enableLanguages} ; do
|
|
|
|
install -Dt $out/share/tessdata $src/$lang ;
|
|
|
|
done;
|
|
|
|
'';
|
|
|
|
preferLocalBuild = true;
|
|
|
|
} // (stdenv.lib.optionalAttrs (enableLanguagesHash != null) {
|
|
|
|
# when a hash is given, we make this a fixed output derivation.
|
|
|
|
outputHashMode = "recursive";
|
|
|
|
outputHashAlgo = "sha256";
|
|
|
|
outputHash = enableLanguagesHash;
|
|
|
|
}));
|
|
|
|
in
|
|
|
|
|
2013-06-11 18:22:30 +01:00
|
|
|
stdenv.mkDerivation rec {
|
|
|
|
name = "tesseract-${version}";
|
2017-04-08 01:43:18 +01:00
|
|
|
version = "3.05.00";
|
2011-04-24 19:04:07 +01:00
|
|
|
|
2016-12-19 12:05:30 +00:00
|
|
|
src = fetchFromGitHub {
|
|
|
|
owner = "tesseract-ocr";
|
|
|
|
repo = "tesseract";
|
|
|
|
rev = version;
|
2017-04-08 01:43:18 +01:00
|
|
|
sha256 = "11wrpcfl118wxsv2c3w2scznwb48c4547qml42s2bpdz079g8y30";
|
2011-04-24 19:04:07 +01:00
|
|
|
};
|
|
|
|
|
2018-06-18 23:03:48 +01:00
|
|
|
enableParallelBuilding = true;
|
2012-06-11 11:28:28 +01:00
|
|
|
|
2017-04-08 01:43:18 +01:00
|
|
|
nativeBuildInputs = [ pkgconfig autoreconfHook ];
|
2016-12-19 12:05:30 +00:00
|
|
|
buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ];
|
2016-02-20 22:33:10 +00:00
|
|
|
|
2016-12-19 12:05:30 +00:00
|
|
|
LIBLEPT_HEADERSDIR = "${leptonica}/include";
|
2011-04-24 19:04:07 +01:00
|
|
|
|
2018-06-18 23:03:48 +01:00
|
|
|
postInstall = ''
|
|
|
|
for i in ${tessdata}/share/tessdata/*; do
|
|
|
|
ln -s $i $out/share/tessdata;
|
|
|
|
done
|
2017-04-07 22:05:04 +01:00
|
|
|
'';
|
2011-04-24 21:01:19 +01:00
|
|
|
|
2011-04-24 19:04:07 +01:00
|
|
|
meta = {
|
|
|
|
description = "OCR engine";
|
2017-08-22 19:50:04 +01:00
|
|
|
homepage = https://github.com/tesseract-ocr/tesseract;
|
2014-06-19 05:19:00 +01:00
|
|
|
license = stdenv.lib.licenses.asl20;
|
2011-04-24 19:04:07 +01:00
|
|
|
maintainers = with stdenv.lib.maintainers; [viric];
|
2017-04-21 06:10:52 +01:00
|
|
|
platforms = with stdenv.lib.platforms; linux ++ darwin;
|
2011-04-24 19:04:07 +01:00
|
|
|
};
|
|
|
|
}
|