From f9c2cb6b2751d97705d375a10416730addf5347e Mon Sep 17 00:00:00 2001 From: Boris Sukholitko Date: Sun, 19 Oct 2014 10:55:06 +0300 Subject: [PATCH] Add k2pdfopt: mobile PDF/DJVU optimizer --- pkgs/applications/misc/k2pdfopt/default.nix | 110 ++++++++++++++++++ .../applications/misc/k2pdfopt/k2pdfopt.patch | 95 +++++++++++++++ .../misc/k2pdfopt/tesseract.patch | 12 ++ pkgs/top-level/all-packages.nix | 2 + 4 files changed, 219 insertions(+) create mode 100644 pkgs/applications/misc/k2pdfopt/default.nix create mode 100644 pkgs/applications/misc/k2pdfopt/k2pdfopt.patch create mode 100644 pkgs/applications/misc/k2pdfopt/tesseract.patch diff --git a/pkgs/applications/misc/k2pdfopt/default.nix b/pkgs/applications/misc/k2pdfopt/default.nix new file mode 100644 index 000000000000..34bef86f4380 --- /dev/null +++ b/pkgs/applications/misc/k2pdfopt/default.nix @@ -0,0 +1,110 @@ +# Build procedure lifted from https://aur.archlinux.org/packages/k2/k2pdfopt/PKGBUILD +{ stdenv, fetchzip, fetchurl, writeScript, libX11, libXext, autoconf, automake, libtool + , leptonica, libpng, libtiff, zlib, openjpeg, freetype, jbig2dec, djvulibre + , openssl }: + +let + mupdf_src = fetchurl { + url = http://www.mupdf.com/downloads/archive/mupdf-1.5-source.tar.gz; + sha256 = "0sl47zqf4c9fhs4h5zg046vixjmwgy4vhljhr5g4md733nash7z4"; + }; + + tess_src = fetchurl { + url = http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz; + sha256 = "0g81m9y4iydp7kgr56mlkvjdwpp3mb01q385yhdnyvra7z5kkk96"; + }; + + gocr_src = fetchurl { + url = http://www-e.uni-magdeburg.de/jschulen/ocr/gocr-0.49.tar.gz; + sha256 = "06hpzp7rkkwfr1fvmc8kcfz9v490i9yir7f7imh13gmka0fr6afc"; + }; + +in stdenv.mkDerivation rec { + name = "k2pdfopt"; + src = fetchzip { + url = http://www.willus.com/k2pdfopt/src/k2pdfopt_v2.21_src.zip; + sha256 = "1vy0yw41z6p95gmivjk4r534zbg0kqap4lr9ps56kvjw51q8r54j"; + }; + + buildInputs = [ libX11 libXext autoconf automake libtool leptonica libpng libtiff zlib + openjpeg freetype jbig2dec djvulibre openssl ]; + NIX_LDFLAGS = "-lX11 -lXext"; + + k2_pa = ./k2pdfopt.patch; + tess_pa = ./tesseract.patch; + + builder = writeScript "builder.sh" '' + . ${stdenv}/setup + set -e + + plibs=`pwd`/patched_libraries + + tar zxf ${mupdf_src} + cp $src/mupdf_mod/font.c $src/mupdf_mod/string.c mupdf-1.5-source/source/fitz/ + cp $src/mupdf_mod/pdf-* mupdf-1.5-source/source/pdf + + tar zxf ${tess_src} + cp $src/tesseract_mod/dawg.cpp tesseract-ocr/dict + cp $src/tesseract_mod/tessdatamanager.cpp tesseract-ocr/ccutil + cp $src/tesseract_mod/tessedit.cpp tesseract-ocr/ccmain + cp $src/tesseract_mod/tesscapi.cpp tesseract-ocr/api + cp $src/include_mod/tesseract.h $src/include_mod/leptonica.h tesseract-ocr/api + + cp -a $src k2pdfopt_v2.21 + chmod -R +w k2pdfopt_v2.21 + + patch -p0 -i $tess_pa + patch -p0 -i $k2_pa + + cd tesseract-ocr + ./autogen.sh + substituteInPlace "configure" \ + --replace 'LIBLEPT_HEADERSDIR="/usr/local/include /usr/include"' \ + 'LIBLEPT_HEADERSDIR=${leptonica}/include' + ./configure --prefix=$plibs --disable-shared + make install + + cd .. + tar zxf ${gocr_src} + cd gocr-0.49 + ./configure + cp src/{gocr.h,pnm.h,unicode.h,list.h} $plibs/include + cp include/config.h $plibs/include + make libs + cp src/libPgm2asc.a $plibs/lib + + cd ../mupdf-1.5-source + make prefix=$plibs install + install -Dm644 build/debug/libmujs.a $plibs/lib + + cd ../k2pdfopt_v2.21/k2pdfoptlib + gcc -Ofast -Wall -c *.c -I ../include_mod/ -I $plibs/include \ + -I . -I ../willuslib + ar rcs libk2pdfopt.a *.o + + cd ../willuslib + gcc -Ofast -Wall -c *.c -I ../include_mod/ -I $plibs/include + ar rcs libwillus.a *.o + + cd .. + gcc -Wall -Ofast -o k2pdfopt.o -c k2pdfopt.c -I k2pdfoptlib/ -I willuslib/ \ + -I include_mod/ -I $plibs/include + g++ -Ofast k2pdfopt.o -o k2pdfopt -I willuslib/ -I k2pdfoptlib/ -I include_mod/ \ + -I $plibs/include -L $plibs/lib/ \ + -L willuslib/ -L k2pdfoptlib/ -lk2pdfopt -lwillus -ldjvulibre -lz -lmupdf \ + -ljbig2dec -ljpeg -lopenjp2 -lpng -lfreetype -lpthread -lmujs \ + -lPgm2asc -llept -ltesseract -lcrypto + + mkdir -p $out/bin + cp k2pdfopt $out/bin + ''; + + meta = with stdenv.lib; { + description = "K2pdfopt optimizes PDF/DJVU files for mobile e-readers (e.g. the Kindle) and smartphones."; + homepage = http://www.willus.com/k2pdfopt; + license = licenses.gpl3; + platforms = platforms.linux; + maintainers = [ maintainers.bosu ]; + }; +} + diff --git a/pkgs/applications/misc/k2pdfopt/k2pdfopt.patch b/pkgs/applications/misc/k2pdfopt/k2pdfopt.patch new file mode 100644 index 000000000000..00ac5770ea42 --- /dev/null +++ b/pkgs/applications/misc/k2pdfopt/k2pdfopt.patch @@ -0,0 +1,95 @@ +diff -aur k2pdfopt_v2.21/willuslib/array.c k2pdfopt_v2.21.new/willuslib/array.c +--- k2pdfopt_v2.21/willuslib/array.c 2014-05-23 16:29:58.000000000 -0300 ++++ k2pdfopt_v2.21.new/willuslib/array.c 2014-07-26 11:35:49.829825567 -0300 +@@ -1055,7 +1055,7 @@ + void arrayf_sort(float *a,int n) + + { +- sort(a,(long)n); ++ willus_sort(a,(long)n); + } + + +diff -aur k2pdfopt_v2.21/willuslib/math.c k2pdfopt_v2.21.new/willuslib/math.c +--- k2pdfopt_v2.21/willuslib/math.c 2013-08-15 21:33:50.000000000 -0300 ++++ k2pdfopt_v2.21.new/willuslib/math.c 2014-07-26 11:36:02.853170659 -0300 +@@ -532,7 +532,7 @@ + + + +-void sort(float *x,int n) ++void willus_sort(float *x,int n) + + { + int top,n1; +diff -aur k2pdfopt_v2.21/willuslib/ocrjocr.c k2pdfopt_v2.21.new/willuslib/ocrjocr.c +--- k2pdfopt_v2.21/willuslib/ocrjocr.c 2012-11-12 13:09:42.000000000 -0300 ++++ k2pdfopt_v2.21.new/willuslib/ocrjocr.c 2014-07-26 11:36:46.699837185 -0300 +@@ -29,6 +29,8 @@ + #ifdef HAVE_GOCR_LIB + #include + ++job_t *JOB; ++ + /* + ** bmp8 must be grayscale + ** (x1,y1) and (x2,y2) from top left of bitmap +@@ -66,6 +68,7 @@ + h=y2-y1+1; + dh=h+bw*2; + job=&_job; ++ JOB=job; + job_init(job); + job_init_image(job); + // willus_mem_alloc_warn((void **)&job->src.p.p,w*h,funcname,10); +diff -aur k2pdfopt_v2.21/willuslib/string.c k2pdfopt_v2.21.new/willuslib/string.c +--- k2pdfopt_v2.21/willuslib/string.c 2014-02-03 00:37:44.000000000 -0300 ++++ k2pdfopt_v2.21.new/willuslib/string.c 2014-07-26 11:37:01.766506277 -0300 +@@ -81,7 +81,7 @@ + ** Returns NULL if EOF, otherwise returns pointer to the string. + ** + */ +-char *get_line(char *buf,int max,FILE *f) ++char *willus_get_line(char *buf,int max,FILE *f) + + { + int i; +diff -aur k2pdfopt_v2.21/willuslib/willus.h k2pdfopt_v2.21.new/willuslib/willus.h +--- k2pdfopt_v2.21/willuslib/willus.h 2014-07-25 15:03:51.000000000 -0300 ++++ k2pdfopt_v2.21.new/willuslib/willus.h 2014-07-26 11:37:56.316506038 -0300 +@@ -214,9 +214,6 @@ + ** CMAKE handles the defines, not this source + ** (Mod from Dirk Thierbach, 31-Dec-2013) + */ +-#ifdef USE_CMAKE +-#include "config.h" +-#else /* USE_CMAKE */ + + #ifndef HAVE_Z_LIB + #define HAVE_Z_LIB +@@ -268,7 +265,6 @@ + #undef HAVE_GSL_LIB + #endif + +-#endif /* USE_CMAKE */ + /* + ** Consistency check + */ +@@ -533,7 +529,7 @@ + int *n,FILE *err); + int readxyz_ex (char *filename,double **x,double **y,double **z, + int *n,FILE *err,int ignore_after_semicolon); +-void sort (float *x,int n); ++void willus_sort (float *x,int n); + void sortd (double *x,int n); + void sorti (int *x,int n); + void sortxy (float *x,float *y,int n); +@@ -602,7 +598,7 @@ + /* string.c */ + void clean_line (char *buf); + void clean_line_end(char *buf); +-char *get_line (char *buf,int max,FILE *f); ++char *willus_get_line (char *buf,int max,FILE *f); + char *get_line_cf (char *buf,int max,FILE *f); + int mem_get_line_cf(char *buf,int maxlen,char *cptr,long *cindex,long csize); + int in_string (char *buffer,char *pattern); diff --git a/pkgs/applications/misc/k2pdfopt/tesseract.patch b/pkgs/applications/misc/k2pdfopt/tesseract.patch new file mode 100644 index 000000000000..5cb6e0fe3176 --- /dev/null +++ b/pkgs/applications/misc/k2pdfopt/tesseract.patch @@ -0,0 +1,12 @@ +diff -aur tesseract-ocr/api/Makefile.am tesseract-ocr.new/api/Makefile.am +--- tesseract-ocr/api/Makefile.am 2012-10-09 14:18:39.000000000 -0300 ++++ tesseract-ocr.new/api/Makefile.am 2014-03-20 18:43:13.926030341 -0300 +@@ -36,7 +36,7 @@ + if VISIBILITY + libtesseract_api_la_CPPFLAGS += -DTESS_EXPORTS + endif +-libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp ++libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp tesscapi.cpp + + lib_LTLIBRARIES += libtesseract.la + libtesseract_la_LDFLAGS = diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index c29ca8a04701..aa9071a9cf38 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -1486,6 +1486,8 @@ let jwhois = callPackage ../tools/networking/jwhois { }; + k2pdfopt = callPackage ../applications/misc/k2pdfopt { }; + kazam = callPackage ../applications/video/kazam { }; kalibrate-rtl = callPackage ../tools/misc/kalibrate-rtl { };